/* 

Name : prepare_data_decile.do 
Authors: Martin, Méjean, Parenti
Last update: Sept. 15, 2023  
Aim : construct a dataset with info on the length of transactions, by product, destination and size decile 


*/ 

cd $firmpath
use $dataset, clear
collapse (count) nbs=export (sum) export (max) max=export, by(month year iso2 buyer $product)
g multi=nbs!=1 
egen _=sum(export), by(multi) 
egen __=sum(max), by(multi)
tab multi _ // 6.5% des flux, 25% en valeur 
tab multi __ // 93.5% of trade btw the buyers and their main supplier
drop _ __ 
egen totn=count(export),  by($product)
egen totx=sum(export),  by($product)
keep if multi==1 
collapse (sum) export (count) nb=export (mean) totn totx, by($product) 
g sh_multisupplier=nb/totn 
g shv_multisupplier=export/totx
keep sh* $product 
save multisupplier_by$product, replace


use $dataset, clear
sort siren year month
by siren: g _=1 if _n==1
by siren: g __=1 if _n==_N
bys siren year month: egen firstdate_siren=mean(_)
bys siren year month: egen lastdate_siren=mean(__)
drop _ __

g firstdate_sample=firstdate_siren*year 


sort siren buyer iso2 year month
by siren buyer iso2: g _=(_n==1)*year
bys siren buyer iso2: egen firstdate_pair=max(_)
drop _ 

bys buyer iso2 $product: egen _=count(export)
replace _=0 if _!=1
save temp, replace

bys buyer iso2 $product: keep if _n==1
g export_one=_*export
sort iso2 $product
collapse (count) count_bp=_ (sum) count_bp_one=_ export export_one, by($product)
g share_onetransaction=count_bp_one/count_bp
g vshare_onetransaction=export_one/export
save share_onetransactionhistories_by$product, replace

collapse (sum) count_bp count_bp_one (sum) export export_one
g share_onetransaction=count_bp_one/count_bp
di share_onetransaction // .4412356
use temp, clear
drop if _==1
drop _
save temp, replace
egen value=sum(export)
quietly sum value
local count=r(N)
local tot=r(mean)
id_group id_s "siren"
id_group id_b "buyer"
id_group id_sp "siren $product"
id_group id_bp "buyer $product"
id_group id_sbp "siren buyer $product"
foreach i in s b bp sp sbp{
	quietly sum id_`i'
	local count_`i'=r(max)
}
local s=$endyear+1
local t=$endyear+2
local u=$endyear+3
putexcel set $outputpath/SummaryStatistics_$firstyear$lastyear , sheet("Coverage") modify
#delimit ; 
putexcel A`u'=("$firstyear-$lastyear") B`s'=("Less one-transaction buyer*seller") 
 B`t'=("Count") C`t'=("Value") D`t'=("Count siren") E`t'=("Count buyer") F`t'=("Count siren*product") G`t'=("Count buyer*product") H`t'=("Count siren*buyer*product")
 B`u'=(`count') C`u'=(`tot') D`u'=(`count_s') E`u'=(`count_b') F`u'=(`count_sp') G`u'=(`count_bp') H`u'=(`count_sbp')
; 
#delimit cr 
local l=$endcountry+1
local lp=$endcountry+2
local lpp=$endcountry+3
putexcel set $outputpath/SummaryStatistics_$firstyear$lastyear , sheet("Coverage Country") modify
#delimit ; 
putexcel A`lpp'=("All") B`l'=("Less one-transaction buyer*seller") 
 B`lp'=("Count") C`lp'=("Value") D`lp'=("Count siren") E`lp'=("Count buyer") F`lp'=("Count siren*product") G`lp'=("Count buyer*product") H`lp'=("Count siren*buyer*product")
 B`lpp'=(`count') C`lpp'=(`tot') D`lpp'=(`count_s') E`lpp'=(`count_b') F`lpp'=(`count_sp') G`lpp'=(`count_bp') H`lpp'=(`count_sbp')
; 
#delimit cr 
drop value id_s id_b id_sp id_sbp id_bp
egen tot=sum(export), by(year)
local j=1
forvalues year=$firstyear(1)$lastyear{
	local k=`u'+`j'
	quietly sum tot if year==`year'
	local tot=r(mean)
	local count=r(N)
	preserve
	keep if year==`year'
	id_group id_s "siren"
	id_group id_b "buyer"
	id_group id_bp "buyer $product"
	id_group id_sp "siren $product"
	id_group id_sbp "siren buyer $product"
	foreach i in s b sp bp sbp{
		quietly sum id_`i'
		local count_`i'=r(max)
	}
	restore
	putexcel set $outputpath/SummaryStatistics_$firstyear$lastyear , sheet("Coverage") modify
	putexcel A`k'=("`year'") B`k'=(`count') C`k'=(`tot') D`k'=(`count_s') E`k'=(`count_b')  F`k'=(`count_sp') G`k'=(`count_bp') H`k'=(`count_sbp')
	local j=`j'+1
}
global endyear=`k'
drop tot 
local j=1
foreach i in $countrysample{
	local m=`lpp'+`j'
	preserve
	keep if iso2=="`i'"
	egen tot=sum(export)
	quietly sum tot
	local tot=r(mean)
	local count=r(N)
	id_group id_s "siren"
	id_group id_b "buyer"
	id_group id_sp "siren $product"
	id_group id_bp "buyer $product"
	id_group id_sbp "siren buyer $product"
	foreach q in s b sp bp sbp{
		quietly sum id_`q'
		local count_`q'=r(max)
	}
	restore
	putexcel set $outputpath/SummaryStatistics_$firstyear$lastyear , sheet("Coverage Country") modify
	putexcel A`m'=("`i'") B`m'=(`count') C`m'=(`tot') D`m'=(`count_s') E`m'=(`count_b')  F`m'=(`count_sp') G`m'=(`count_bp') H`m'=(`count_sbp')
	local j=`j'+1
}
global endcountry=`m'

*** we merge with French PPI to have exports in 2010 euros 

use temp, clear
sort year month 
merge m:1 year month using $countrypath/ppi_france // monthly PPI data from INSEE 
keep if _m==3 
drop _m 
rename export export_n 
g export = export_n/ppi*100
label var export "real exports (deflated by PPI (2010=100))"
save new_$dataset, replace

*** here we compute the length of transactions
construct_base_4decile new_$dataset base_4decile$firstyear$lastyear "$time_constraint" "$time_constraint2" whole "$countrysample" // "$countrysample"
// construct_base4decile is the name of the program, 
// new_$dataset$firstyear$lastyear is the name of the input database, 
// base_4decile$firstyear$lastyear is the name of the output, 
// the parts under " " are the restrictions we impose: 
// $time_constraint treats right-censoring
// $time_constraint2 treats left-censoring
// this part is a bit boring => need to identify continuous relationships and compute their duration 
 

*** build our group variables on the final sample 
use base_4decile$firstyear$lastyear, clear
sort iso2 id_bp id_bs id_bs_continuous
id_group _ "iso2 $product id_bp" // id_group does the same job as egen group (but work if we have millions of groups)
drop id_bp
rename _ id_bp
id_group _ "iso2 $product id_bs"
drop id_bs
rename _ id_bs
id_group _ "iso2 $product id_bs_continuous"
drop id_bs_continuous
rename _ id_bs_continuous
save base_4decile$firstyear$lastyear, replace

*** compute size decile  // this is the time-consuming part. 

use base_4decile$firstyear$lastyear, clear
egen p=group($product)
drop if export==0
g dur=length_bps
g size=export 
collapse (mean) size dur  , by(id_bs_continuous $product iso2) 
g lsize=log(size)
egen _=mean(lsize), by($product)
g lsize_= size- _ 
egen min=pctile(lsize_), by($dim_decile)  p(1) 
egen max=pctile(lsize_), by($dim_decile)  p(99) 
egen decile=xtile(lsize_) , by($dim_decile) nq(10) 
drop if lsize_<min | lsize_>max // we truncate the first and tenth deciles to fit with the theoretical framework (see appendix) 
save base_4decile$firstyear$lastyear$variant, replace


